
##################################################
## Human Trafficking Indicators: A New Dataset ##
## Author: Richard W Frank                     ##
## Journal: International Interactions         ##
## Code written: April 14, 2021                ##
## Software: rstan ver. 2.15.1, R ver. 3.3.1   ##
## Purpose: STATIC ANALYSIS AND PLOTS          ## 
#################################################

 
rm(list=ls())
library(rstan)
options(mc.cores = parallel::detectCores())
rstan_options(auto_write = TRUE)
library(ggplot2)
library(rstan)
library(rstanarm)
library(bayesplot)
library(loo)

## change working directory 
setwd("~")
load("hti_static.RData")

mcmc_trace(fit, regex_pars = "beta")
mcmc_trace(fit, regex_pars = "alpha")

output <- extract(fit, permuted = TRUE)

######################################
### GENERATING DATASETS OF RESULTS ###
######################################

## latent thetas ##
 
theta_mean <- apply(output$x, 2, mean)
theta_sd <- apply(output$x, 2, sd)
data$theta_mean <- NA
data$theta_sd <- NA
for(ii in 1:nrow(data)){
  data$theta_mean[ii] <- theta_mean[id[ii]]
  data$theta_sd[ii] <- theta_sd[id[ii]]
}

write.csv(dplyr::select(data,ccode,year,theta_mean,theta_sd),
          file="hti_static_thetas.csv", sep="_",na="",row.names=F)

### GENERATING LL AND PAR DATA ###
var.names <- names(output)
ll <- extract_log_lik(fit, grep("ll_", var.names, value=T))
write.csv(ll, file=paste(model, "static_ll.csv", sep="_"))
post.var <- as.matrix(fit, par=var.names[!grepl("ll_", var.names)])
write.csv(post.var, file=paste(model, "static_par.csv", sep="_"), row.names=F)
 
### Discrimination (i.e. BETAS) ########
beta2 = cbind(apply(output$beta_k2,2,mean),apply(output$beta_k2,2,quantile,.025),apply(output$beta_k2,2,quantile,.975))
beta5 = cbind(apply(output$beta_k5,2,mean),apply(output$beta_k5,2,quantile,.025),apply(output$beta_k5,2,quantile,.975))
betas <-rbind(beta2, beta5)
rownames(betas)<-c("dest",
                   "ldest",
                   "pdest",
                   "ddest",
                   "dsdest",
                   "cldest", 
                   "cpdest",
                   "u_vict_5", 
                   "c_sex_5", 
                   "c_labor_5", 
                   "c_vic_5")

## writing to csv to plot in Stata
write.csv(betas, "static_betas.csv", row.names=TRUE)

#### Difficulty ####

PARAMETERS_s_k2 = cbind(apply(output$alpha_k2,2,mean),apply(output$alpha_k2,2,sd),apply(output$alpha_k2, 2, quantile, 0.975 ),apply(output$alpha_k2, 2, quantile, 0.025 ),
                        apply(output$beta_k2,2,mean),apply(output$beta_k2,2,sd),apply(output$beta_k2, 2, quantile, 0.975 ),apply(output$beta_k2, 2, quantile, 0.025 ) )
colnames(PARAMETERS_s_k2)<-c("c_mean","c_sd","c_975","c_025","b_mean","b_sd","b_975","b_025")
rownames(PARAMETERS_s_k2)<-c("dest","ldest", "pdest", "ddest", "dsdest", "cldest",  "cpdest")
alpha2 = PARAMETERS_s_k2[,1:4]
colnames(alpha2) <-c("c1_mean", "c1_sd",   "c1_975" , "c1_025")
PARAMETERS_s_k5 = cbind(apply(output$c_k5[,,1],2,mean),apply(output$c_k5[,,1],2,sd),apply(output$c_k5[,,1], 2, quantile, 0.975 ),apply(output$c_k5[,,1], 2, quantile, 0.025),
                        apply(output$c_k5[,,2],2,mean),apply(output$c_k5[,,2],2,sd),apply(output$c_k5[,,2], 2, quantile, 0.975 ),apply(output$c_k5[,,2], 2, quantile, 0.025),
                        apply(output$c_k5[,,3],2,mean),apply(output$c_k5[,,3],2,sd),apply(output$c_k5[,,3], 2, quantile, 0.975 ),apply(output$c_k5[,,3], 2, quantile, 0.025),
                        apply(output$c_k5[,,4],2,mean),apply(output$c_k5[,,4],2,sd),apply(output$c_k5[,,4], 2, quantile, 0.975 ),apply(output$c_k5[,,4], 2, quantile, 0.025),                        
                        apply(output$beta_k5,2,mean),apply(output$beta_k5,2,sd),apply(output$beta_k5, 2, quantile, 0.975 ),apply(output$beta_k5, 2, quantile, 0.025 ) )
colnames(PARAMETERS_s_k5)<-c("c1_mean","c1_sd","c1_975","c1_025",
                             "c2_mean","c2_sd","c2_975","c2_025",
                             "c3_mean", "c3_sd", "c3_975", "c3_025",
                             "c4_mean", "c4_sd", "c4_975", "c4_025",
                             "b_mean","b_sd","b_975","b_025")
rownames(PARAMETERS_s_k5)<-c("u_vict_5", "c_sex_5",  "c_labor_5",  "c_vic_5")
alpha5 = PARAMETERS_s_k5[,1:16]
alphas_1 <-rbind.fill(as.data.frame(alpha2), as.data.frame(alpha5))
rownames(alphas_1)<-c("dest",
                      "ldest",
                      "pdest",
                      "ddest",
                      "dsdest",
                      "cldest", 
                      "cpdest",
                      "u_vict_5", 
                      "c_sex_5", 
                      "c_labor_5", 
                      "c_vic_5")

alphas_1 <- alphas_1 [order(alphas_1 [,1]),]

## writing to csv to plot in Stata
write.csv(alphas_1, "static_alphas.csv", row.names=TRUE)
 
#############################
### ASSESSING CONVERGENCE ###
#############################
 
## 1. GELMAN-RUBIN Criterion for convergence
library(coda)
gelman.diag(fit)

## 2. Monte Carlo Error
MC_error(fit)

###################
## 3. Traceplots ##
    
    ## the thing to look for here is visualizing the posterior sample 
    ## Useful example: https://cran.r-project.org/web/packages/JointAI/vignettes/AfterFitting.html  
    ## When the sampler has converged the chains show one horizontal band.
    ## These trace plots would then suggest that both models have converged. 
    ## For all parameters, the four chains have mixed and there are no clear trends.

## starting only with theta ##

lp <- log_posterior(fit)
np <- nuts_params(fit)
posterior <- as.array(fit)

mcmc_trace(posterior, np = np, window = c(1000,2000)) +
  xlab("Post-warmup iteration")

traceplot(output$theta_raw, inc_warmup = TRUE )

traceplot(posterior_static, start=1000, ncol = 3, use_ggplot = TRUE) +
  theme(legend.position = 'bottom')  
 
fit %>%
  mcmc_trace()

######################
## 4. Density plots ##
 
densplot(fit, start=1000, ncol = 3, col = c("darkred", "darkblue", "darkgreen"),
         vlines = list(list(v = c(rep(0, length(coef(fit) )), NA),
        col = grey(0.8))))


##############
## 5. RHATS ##

## Rhat is a convergence diagnostic which compares parameter estimates across the chains. 
## FROM: https://blog.methodsconsultants.com/posts/introduction-to-stan-in-r/
## If the chains have converged and mixed well, then the Rhat value should be near 1. 
## If the chains have not converged to the same value, then the Rhat value will be larger than 1. 
## Rhat values of 1.05 or higher suggest a convergence issue. 


## another way to look at rhats 
rhats <- rhat(fit)
write.csv(rhats, "static_rhats.csv", row.names=TRUE)
 
## EFFECTIVE SAMPLE SIZE

neff=summary(fit)$summary[,'n_eff']
neff_ratio<-neff_ratio(fit)
mcmc_neff(neff_ratio, size = 2)
write.csv(neff_ratio, "static_neff_ratio.csv", row.names=TRUE) 
 
 
## Sampler
# all chains combined
sampler_params <- get_sampler_params(fit, inc_warmup = TRUE)
summary(do.call(rbind, sampler_params), digits = 2)

## Posterior predictive checks ##
## seeing in histograms whether predicted data from theta estimates match with the observed data.
 
y <- fit$x